#### Set up ####
remove(list = ls())
library(haven)
library(tidyverse)
library(writexl)
library(y2clerk)
library(sylcount)
library(tidymodels)
library(stargazer)
library(sjmisc)
library(descr)
# library(MASS)
# step <- read_dta("~/Desktop/CurrentResearch/Cap-punishment-race-religion/GSS_stata/GSS7218_R3.DTA")
step <- read_dta("data/gss7221_r1a.dta")
responses <- step
responses[] <- lapply(step, unclass)
responses <- as.data.frame(responses)
rm(step)
#### Create new racial resentment index (based on Carter and Corra) ####
responses <- responses %>%
  mutate(rr_education = case_when(racdif3 == 1 ~ 0,
                                  racdif3 == 2 ~ 1),
         rr2_lackwill = case_when(racdif4 == 1 ~ 1,
                                  racdif4 == 2 ~ 0),
         rr3_discrimination = case_when(racdif1 == 1 ~ 0,
                                        racdif1 == 2 ~ 1))


responses <- responses %>%
  mutate(cc2016_rr = rr_education + rr2_lackwill + rr3_discrimination)

responses <- responses %>%
  mutate(cc2016_rr = as.numeric(cc2016_rr))

#### Creating other RR indices ####
# Tuch and Hughes modified (rr_index_new)
responses <- responses %>% 
  mutate(rr_single_binary = case_when(wrkwayup <= 2 ~ 1,
                                      wrkwayup > 2 ~ 0)) %>% 
  mutate(rr_index_new = rr2_lackwill + rr3_discrimination + rr_single_binary)

# Single question
responses <- responses %>% 
  mutate(rr_single = case_when(wrkwayup == 1 ~ 5,
                               wrkwayup == 2 ~ 4,
                               wrkwayup == 3 ~ 3,
                               wrkwayup == 4 ~ 2,
                               wrkwayup == 5 ~ 1))

# Tuch and Hughes exact replication created in Stata

#### Recode Punitive Variables ####
responses <- responses %>% 
  mutate(punish1_courts = case_when(courts == 1 ~ 0,
                                    courts == 2 ~ .5,
                                    courts == 3 ~ 1),
         punish2_dp = case_when(cappun == 2 ~ 0,
                                cappun == 1 ~ 1),
         punish3_crimespending = case_when(natcrime == 3 ~ 0,
                                           natcrime == 2 ~ .5,
                                           natcrime == 1 ~ 1))


#### Recode Religious Belief Variables ####
responses <- responses %>% 
  mutate(punish_sin = case_when(punsin == 1 ~ 4,
                                punsin == 2 ~ 3,
                                punsin == 3 ~ 2,
                                punsin == 4 ~ 1),
         believe_hell = case_when(hell == 1 ~ 4,
                                  hell == 2 ~ 3,
                                  hell == 3 ~ 2,
                                  hell == 4 ~ 1),
         bible_literal = case_when(bible == 1 ~ 1,
                                   bible == 2 ~ 0,
                                   bible == 3 ~ 0,
                                   bible == 4 ~ 0,
                                   bible == 8 ~ 0,
                                   bible == 9 ~ 0),
         god_concerned = case_when(theism == 1 ~ 5,
                                   theism == 2 ~ 4,
                                   theism == 3 ~ 3,
                                   theism == 4 ~ 2,
                                   theism == 5 ~ 1))

#### Code control variables ####
# Sex
responses <- responses %>% 
  mutate(female = case_when(sex == 2 ~ 1,
                            sex == 1 ~ 0),
         educ_degree = case_when(degree == 0 ~ 'Less than high school',
                                 degree == 1 ~ 'High school',
                                 degree == 2 ~ 'Associate or junior college',
                                 degree == 3 ~ "Bachelor's",
                                 degree == 4 ~ 'Graduate'))

responses <- responses %>% 
  mutate(church_attend_label = case_when(attend == 0 ~ 'Never',
                                         attend == 1 | 
                                           attend == 2 ~ 'Rarely',
                                         attend == 3 ~ 'Few times a year',
                                         attend == 4 ~ 'Once a month',
                                         attend == 5 |
                                           attend == 6 ~ 'Nearly every week',
                                         attend == 7 ~ 'Every week',
                                         attend == 8 ~ 'More than once a week'))

responses <- responses %>% 
  mutate(church_attend = case_when(church_attend_label == 'Never' ~ 1,
                                   church_attend_label == "Rarely" ~ 2,
                                   church_attend_label == "Few times a year" ~ 3,
                                   church_attend_label == 'Once a month' ~ 4,
                                   church_attend_label == "Nearly every week"  ~ 5,
                                   church_attend_label == "Every week" ~ 6,
                                   church_attend_label == "More than once a week" ~ 7))

responses <- responses %>% 
  mutate(age_group = case_when(age >= 18 &
                                 age <= 29 ~ "18-29",
                               age >= 30 &
                                 age <=  44 ~ "30-44",
                               age >= 45 &
                                 age <= 59 ~ "45-59",
                               age >= 60 ~ "60 and older"))

responses <- responses %>% 
  mutate(polviews5 = case_when(polviews == 1 ~ 1,
                               polviews == 2 ~ 2,
                               polviews == 3 ~ 2,
                               polviews == 4 ~ 3,
                               polviews == 5 ~ 4,
                               polviews == 6 ~ 4,
                               polviews == 7 ~ 5))

responses <- responses %>% 
  mutate(pid3 = case_when(partyid == 0 ~ 1,
                          partyid == 1 ~ 1,
                          partyid == 2 ~ 2,
                          partyid == 3 ~ 2,
                          partyid == 4 ~ 2,
                          partyid == 5 ~ 3,
                          partyid == 6 ~ 3,
                          partyid == 7 ~ 2))

#### Burge RELTRAD coding ####
library(remotes)
remotes::install_github("ryanburge/socsci")
library(socsci)
library(car)

# Read your data in as gss 
# gss <- read.fst("D://clean_gss.fst")


gss <- responses %>% as_tibble()

#####> naffil xaffil
gss <- gss %>% 
  mutate(naffil = car::recode(relig, "1=1;2=4;3=5;4=9;5:10=6;11=1;12=6;13=1;else=0")) %>% 
  mutate(xaffil = factor(naffil,levels = c(0,1,4,5,6,9), labels = c(NA,"prot","cath","jew","other","nonaf")))


#####> Black Protestants
gss <- gss %>% 
  mutate(xbp = car::recode(other,"7=1;14=1;15=1;21=1;37=1;38=1;56=1;78=1;79=1;85=1;86=1;87=1;88=1;98=1;103=1;104=1;128=1;133=1;else=0")) %>% 
  mutate(
    xbp = case_when(
      denom == 12 ~ 1, denom == 13 ~ 1, denom == 20 ~ 1, denom == 21 ~ 1, TRUE ~ xbp)) %>% 
  mutate(bl = race) %>% 
  mutate(bl = car::recode(bl, "2=1;else=0")) %>% 
  mutate(bldenom = denom*bl) %>% 
  mutate(
    xbp = case_when(
      bldenom == 23 ~ 1, bldenom == 28 ~ 1, bldenom == 18 ~ 1, bldenom == 15 ~ 1, bldenom == 10 ~ 1, bldenom == 11 ~ 1, bldenom == 14 ~ 1, TRUE ~ xbp)) %>% 
  mutate(blother = other*bl) %>% 
  mutate(
    xbp = case_when(
      blother == 93 ~ 1,
      TRUE ~ xbp))

#####> xev 
gss <- gss %>% 
  mutate(xev = car::recode(other, "2=1;3=1;5=1;6=1;9=1;10=1;12=1;13=1;16=1;18=1;
                      20=1;22=1;24=1;26=1;27=1;28=1;31=1;32=1;34=1;35=1;36=1;
                      39=1;41=1;42=1;43=1;45=1;47=1;51=1;52=1;53=1;55=1;57=1;
                      63=1;65=1;66=1;67=1;68=1;69=1;76=1;77=1;83=1;84=1;90=1;
                      91=1;92=1;94=1;97=1;100=1;101=1;102=1;106=1;107=1;108=1;
                      109=1;110=1;111=1;112=1;115=1;116=1;117=1;118=1;120=1;
                      121=1;122=1;124=1;125=1;127=1;129=1;131=1;132=1;134=1;
                      135=1;138=1;139=1;140=1;146=1;else=0")) %>% 
  mutate(
    xev = case_when(
      denom == 32 ~ 1,
      denom == 33 ~ 1,
      denom == 34 ~ 1,
      denom == 42 ~ 1,
      TRUE ~ xev)) %>% 
  mutate(wh = car::recode(race, "1=1;2=0;3=1")) %>% 
  mutate(whdenom = denom*wh) %>% 
  mutate(
    xev = case_when(
      whdenom == 23 ~ 1,
      whdenom == 18 ~ 1,
      whdenom == 15 ~ 1,
      whdenom == 10 ~ 1,
      whdenom == 14 ~ 1,
      TRUE ~ xev)) %>% 
  mutate(whother = other*wh) %>% 
  mutate(
    xev = case_when(
      whother == 93 ~ 1,
      TRUE ~ xev)) %>% 
  mutate(
    xev = case_when(
      xbp == 1 ~ 0,
      TRUE ~ xev))

#####> xml 
gss <- gss %>%
  mutate(xml = car::recode(other, "1=1;8=1;19=1;23=1;25=1;40=1;44=1;46=1;48=1;49=1;50=1;54=1;70=1;71=1;72=1;73=1;81=1;89=1;96=1;99=1;105=1;119=1;148=1;else=0")) %>% 
  mutate(
    xml = case_when(
      denom == 22 ~ 1, denom == 30 ~ 1, denom == 31 ~ 1, denom == 35 ~ 1, denom == 38 ~ 1, denom == 40 ~ 1, denom == 41 ~ 1, denom == 43 ~ 1, denom == 48 ~ 1, denom == 50 ~ 1, whdenom == 11 ~ 1, whdenom == 28 ~ 1, TRUE ~ xml))

#####> xcath xjew xother
gss <- gss %>% 
  mutate(xcath = car::recode(other, "123=1;else=0")) %>% 
  mutate(
    xcath = case_when(
      naffil == 4 ~ 1, TRUE ~ xcath
    )) %>% 
  mutate(
    xjew = case_when(
      naffil == 5 ~ 1, TRUE ~ 0
    )) %>% 
  mutate(xother = car::recode(other, "11=1;17=1;29=1;30=1;33=1;58=1;59=1;60=1;61=1;62=1;64=1;74=1;75=1;80=1;82=1;95=1;113=1;114=1;130=1;136=1;141=1;145=1;else=0")) %>% 
  mutate(noxev = 1-xev) %>% 
  mutate(noxevxaf = noxev*naffil) %>% 
  mutate(
    xother = case_when(
      noxevxaf == 6 ~ 1,
      TRUE ~ xother))

#####> xnonaff xprotdk 
gss <- gss %>% 
  mutate(xnd = case_when(naffil == 9 | denom == 70 | relig == 11 | relig == 13 ~ 1, TRUE ~ 0))

gss <- gss %>% 
  mutate(xnonaff = car::recode(naffil,"9=1;else=0")) 

#####> reltrad 
gss <- gss %>% 
  mutate(
    reltrad = case_when(
      xnonaff == 1 ~ 7, xother == 1 ~ 6, xjew == 1 ~ 5, xcath == 1 ~ 4, xbp == 1 ~ 3, xml == 1 ~ 2, xev == 1 ~ 1, xnd == 1 ~ 8, TRUE ~ 9)) 

gss <- gss %>% 
  mutate(reltrad = frcode(reltrad == 1 ~ "Evangelical Protestant",
                          reltrad == 2 ~ "Mainline Protestant",
                          reltrad == 3 ~ "Black Protestant",
                          reltrad == 8 ~ "Non-Denom. Protestant",
                          reltrad == 4 ~ "Catholic",
                          reltrad == 5 ~ "Jewish",
                          reltrad == 6 ~ "Other Faith",
                          reltrad == 7 ~ "Non-affiliate", 
                          TRUE ~ "Unclassified"))

# library(descr)

# gss %>% filter(year == 2016) %>% 
#   freqs(reltrad, 
#         wt = wtssall)

#### Relevel factors and Merge Jewish and unclasified into the other religion category ####
gss <- gss %>% 
  mutate(reltrad = as.character(reltrad)) %>% 
  mutate(reltrad = case_when(reltrad == "Jewish" ~ "Other Faith",
                             T ~ reltrad),
         reltrad = case_when(reltrad == "Unclassified" ~ "Other Faith",
                             T ~ reltrad))

gss$reltrad <- factor(gss$reltrad, levels = c("Non-affiliate",
                                              "Other Faith",
                                              "Catholic",
                                              "Non-Denom. Protestant",
                                              "Black Protestant",
                                              "Mainline Protestant",
                                              "Evangelical Protestant"))

responses <- gss

responses <- responses %>% 
  mutate(educ_degree = fct_rev(educ_degree))

#### Make binary education variable ####
responses <- responses %>% 
  mutate(hs = case_when(educ_degree == 4 |
                          educ_degree == 5 ~ 1,
                        educ_degree == 1 |
                          educ_degree == 2 |
                          educ_degree == 3 ~ 0))

responses <- responses %>% 
  mutate(bach_degree = case_when(degree == 3 ~ 1,
                                 degree == 4 ~ 1,
                                 degree == 2 ~ 0,
                                 degree == 1 ~ 0,
                                 degree == 0 ~ 0))

# 3 or greater

#### Create income variable ####
responses[is.na(responses$degree), "degree"] <- 1

cnnc <- subset(responses, !is.na(responses$coninc))
agg.cnnc <- aggregate(cnnc$coninc, by=list(cnnc$degree), FUN=median)
colnames(agg.cnnc) <- c("degree", "coninc.median")
for(i in agg.cnnc$degree) {
  responses[is.na(responses$coninc)&responses$degree == i, "coninc"] <- agg.cnnc[agg.cnnc$degree == i, "coninc.median"]
}

#### Final recoding steps ####
responses <- responses %>%
  mutate(reltrad = as.factor(reltrad),
         educ_degree = as.factor(educ_degree))

responses <- responses %>%
  mutate(hs = case_when(educ_degree == "Less than high school" ~ 0,
                        educ_degree == "High school" ~ 1,
                        educ_degree == "Graduate" ~ 1,
                        educ_degree == "Bachelor's" ~ 1,
                        educ_degree == "Associate or junior college" ~ 1))

responses <- responses %>%
  mutate(south = case_when(region == 5 |
                             region == 6 |
                             region == 7 ~ 1,
                           region != 5 |
                             region != 6 |
                             region != 7 ~ 0)) %>%
  mutate(white = case_when(race == 1 ~ 1,
                           race != 1 ~ 0),
         black = case_when(race == 2 ~ 1,
                           race != 2 ~ 0),
         other_race = case_when(race == 3 ~ 1,
                                race != 3 ~ 0))

#### Select only the variables we use ####
responses_select <- responses %>% 
  dplyr::select(punish2_dp,
         reltrad,
         rr_index_new,
         bible_literal,
         church_attend,
         polviews,
         black,
         other_race,
         coninc,
         south,
         age,
         age_group,
         bach_degree,
         hs,
         female,
         pid3,
         polviews,
         partyid,
         year,
         natrace,
         racopen,
         courts,
         verdict,
         rr_single_binary,
         rr_single,
         cc2016_rr,
         rr2_lackwill,
         rr3_discrimination,
         rr_education,
         natrace,
         racpush,
         helpblk)

#### Save out data ####
write_dta(responses_select, "data/gss-cappun-CLEAN-restrictVars.dta")
